rm(list = ls())
here::i_am(file.path("code", "00_summary_stats.R"))
library(here)
source(here("code", "config.R"))

#################################################################################
# Load cards, census, analysis data

cards <- read_parquet(here("data", "analysis", "cards.parquet")) |>
  filter(eligible_for_match_census)

matches <- read_parquet(
  here("data", "analysis", "cards_census_matches.parquet"),
  col_select = c("card_id", "standard_match_0")
) |>
  rename(census_id = standard_match_0)

census <- read_parquet(here("data", "analysis", "census.parquet")) |>
  left_join(matches, by = "census_id") |>
  left_join(cards, by = "card_id") |>
  mutate(
    vet_combined = ifelse(!is.na(vet_cards), as.integer(vet_cards | vet_c1930), vet_c1930),
    age = 1930 - birthyr_c1930,
    ownhome = ownershp_c1930 == 10,
    employed = empstat_c1930 == 10,
    literate = lit_c1930 == 4,
    occscore = na_if(occscore_c1930, 0),
    vet_cards = replace_na(vet_cards, 0)
  )

est_df <- read_parquet(here("data", "analysis", "analysis.parquet"))

#################################################################################
# Cards summary stats table

bind_rows(
  cards |> mutate(grp = 1),
  est_df |> mutate(grp = 2)
) |>
  rename(
    farmlaborer = farm_laborer,
    vetcensus = vet_c1930,
    vetcards = vet_cards,
    vetcombined = vet_combined,
    marriedcards = married_cards
  ) |>
  mutate(
    age = 1917 - birthyr_cards,
  ) |>
  group_by(grp) |>
  summarize(
    across(
      c(age, exemption, marriedcards, farmer, laborer, farmlaborer, vetcards, vetcensus, vetcombined),
      .fns = list(mean = \(x) mean(x, na.rm = TRUE), sd = \(x) sd(x, na.rm = TRUE))),
    n = n()
  ) |>
  pivot_longer(cols = -grp) |>
  separate(col = name, into = c("var", "type")) |>
  mutate(
    type = ifelse(var == "n", "", type),
  ) |>
  mutate(
    value = case_when(
      is.na(value) & type == "sd" ~ "",
      is.na(value) & type != "sd" ~ "--",
      type == "sd" ~ as.character(str_glue("({formatC(value, format='f', digits = 2)})")),
      var == "n" ~ formatC(value, format = "f", big.mark=",", digits = 0),
      TRUE ~ formatC(value, format="f", digits = 2)
    )
  ) |>
  pivot_wider(id_cols = c(var, type), names_from = grp, values_from = value) |>
  mutate(
    var = ifelse(is.na(var), "N", var),
    var = ifelse(type == "sd", "", var),
  ) |>
  select(-type) |>
  mutate(
    var = recode(
      var,
      age = "Age in 1917",
      exemption = "Exemption claim",
      marriedcards = "Married in 1917",
      farmer = "Farmer",
      laborer = "Laborer",
      farmlaborer = "Farm laborer",
      vetcensus = "Veteran (Census)",
      vetcards = "Veteran (VAMI/ATS)",
      vetcombined = "Veteran (Any)",
      n = "Observations"
    )
  ) |>
  knitr::kable(
    col.names = linebreak(
      c("", "All\n(1)", "Linked\n(2)"),
      align = "c"
    ),
    align = c("l", "c", "c"),
    booktabs = TRUE,
    digits = 2,
    linesep = linesep(2),
    format = "latex",
    escape = FALSE
  ) |>
  save_kable(file.path(tab_dir, "cards_summary.tex"))

#################################################################################
# NAACP summary stats table

census |>
  filter(age >= 21) |>
  rename(
    incwage = incwage_pred_c1930,
    marriedcensus = married_c1930,
    grp = is_naacp,
    vetcensus = vet_c1930,
    vetcards = vet_cards,
    vetcombined = vet_combined,
  ) |>
  group_by(grp) |>
  summarize(
    across(
      c(age, employed, marriedcensus, literate, occscore, ownhome, vetcards, vetcensus, vetcombined),
      .fns = list(mean = \(x) mean(x, na.rm = TRUE), sd = \(x) sd(x, na.rm = TRUE))),
    n = n()
  ) |>
  pivot_longer(cols = -grp) |>
  separate(col = name, into = c("var", "type")) |>
  mutate(
    type = ifelse(var == "n", "", type),
  ) |>
  mutate(
    value = case_when(
      is.na(value) & type == "sd" ~ "",
      is.na(value) & type != "sd" ~ "--",
      type == "sd" ~ as.character(str_glue("({formatC(value, format='f', digits = 2)})")),
      var == "n" ~ formatC(value, format = "f", big.mark=",", digits = 0),
      TRUE ~ formatC(value, format="f", digits = 2)
    )
  ) |>
  pivot_wider(id_cols = c(var, type), names_from = grp, values_from = value) |>
  mutate(
    var = ifelse(is.na(var), "N", var),
    var = ifelse(type == "sd", "", var),
  ) |>
  select(-type) |>
  mutate(
    var = recode(
      var,
      age = "Age in 1930",
      marriedcensus = "Married in 1930",
      employed = "Employed",
      literate = "Literate",
      occscore = "Occup. income",
      ownhome = "Owns home",
      vetcensus = "Veteran (Census)",
      vetcards = "Veteran (VAMI/ATS)",
      vetcombined = "Veteran (Any)",
      n = "Observations"
    )
  ) |>
  knitr::kable(
    col.names = linebreak(
      c("", "NAACP\n(1)", "Not NAACP\n(2)"),
      align = "c"
    ),
    align = c("l", "c", "c"),
    booktabs = TRUE,
    digits = 2,
    linesep = linesep(2),
    format = "latex",
    escape = FALSE
  ) |>
  save_kable(file.path(tab_dir, "naacp_summary.tex"))

# Table that cuts to birth year 1886 to 1896 + living in NAACP areas
census |>
  filter(birthyr_c1930 >= 1886, birthyr_c1930 <= 1896, is_naacp_area_c1930) |>
  rename(
    incwage = incwage_pred_c1930,
    marriedcensus = married_c1930,
    grp = is_naacp,
    vetcensus = vet_c1930,
    vetcards = vet_cards,
    vetcombined = vet_combined,
  ) |>
  group_by(grp) |>
  summarize(
    across(
      c(age, employed, marriedcensus, literate, occscore, ownhome, vetcards, vetcensus, vetcombined),
      .fns = list(mean = \(x) mean(x, na.rm = TRUE), sd = \(x) sd(x, na.rm = TRUE))),
    n = n()
  ) |>
  pivot_longer(cols = -grp) |>
  separate(col = name, into = c("var", "type")) |>
  mutate(
    type = ifelse(var == "n", "", type),
  ) |>
  mutate(
    value = case_when(
      is.na(value) & type == "sd" ~ "",
      is.na(value) & type != "sd" ~ "--",
      type == "sd" ~ as.character(str_glue("({formatC(value, format='f', digits = 2)})")),
      var == "n" ~ formatC(value, format = "f", big.mark=",", digits = 0),
      TRUE ~ formatC(value, format="f", digits = 2)
    )
  ) |>
  pivot_wider(id_cols = c(var, type), names_from = grp, values_from = value) |>
  mutate(
    var = ifelse(is.na(var), "N", var),
    var = ifelse(type == "sd", "", var),
  ) |>
  select(-type) |>
  mutate(
    var = recode(
      var,
      age = "Age in 1930",
      marriedcensus = "Married in 1930",
      employed = "Employed",
      literate = "Literate",
      occscore = "Occup. income",
      ownhome = "Owns home",
      vetcensus = "Veteran (Census)",
      vetcards = "Veteran (VAMI/ATS)",
      vetcombined = "Veteran (Any)",
      n = "Observations"
    )
  ) |>
  knitr::kable(
    col.names = linebreak(
      c("", "NAACP\n(1)", "Not NAACP\n(2)"),
      align = "c"
    ),
    align = c("l", "c", "c"),
    booktabs = TRUE,
    digits = 2,
    linesep = linesep(2),
    format = "latex",
    escape = FALSE
  ) |>
  save_kable(file.path(tab_dir, "naacp_summary_restricted.tex"))
